In [1]:
Copied!
import math
from dataclasses import dataclass
from typing import Callable
from chalk import Path, hcat, make_path, set_svg_draw_height, set_svg_height
from colour import Color
from mt_diagrams.mlprimer_draw import (
base_model,
compare,
draw_graph,
draw_with_hard_points,
graph,
s,
s1,
s1_hard,
s2,
s2_hard,
show,
show_loss,
split_graph,
with_points,
)
from mt_diagrams.plots import plot_function, plot_function3D
import minitorch
set_svg_draw_height(300)
import math
from dataclasses import dataclass
from typing import Callable
from chalk import Path, hcat, make_path, set_svg_draw_height, set_svg_height
from colour import Color
from mt_diagrams.mlprimer_draw import (
base_model,
compare,
draw_graph,
draw_with_hard_points,
graph,
s,
s1,
s1_hard,
s2,
s2_hard,
show,
show_loss,
split_graph,
with_points,
)
from mt_diagrams.plots import plot_function, plot_function3D
import minitorch
set_svg_draw_height(300)
Module 1.1 - Learning With Derivatives¶
Training Data¶
- Set of datapoints, each $(x,y)$
In [2]:
Copied!
split_graph(s1, s2)
split_graph(s1, s2)
Out[2]:
In [3]:
Copied!
def forward(self, x1: float, x2: float) -> float:
return self.w1 * x1 + self.w2 * x2 + self.b
def forward(self, x1: float, x2: float) -> float:
return self.w1 * x1 + self.w2 * x2 + self.b
Model 1¶
- Linear Model
In [4]:
Copied!
@dataclass
class Linear:
# Parameters
w1: float
w2: float
b: float
def forward(self, x1: float, x2: float) -> float:
return self.w1 * x1 + self.w2 * x2 + self.b
@dataclass
class Linear:
# Parameters
w1: float
w2: float
b: float
def forward(self, x1: float, x2: float) -> float:
return self.w1 * x1 + self.w2 * x2 + self.b
Decision Boundary: Model 1¶
In [5]:
Copied!
model = Linear(1, 1, -0.9)
draw_graph(model)
model = Linear(1, 1, -0.9)
draw_graph(model)
Out[5]:
Distance Determines Fit¶
- $|m(x)|$ / correct or incorrect
In [6]:
Copied!
with_points(s1, s2, Linear(1, 1, -0.4))
with_points(s1, s2, Linear(1, 1, -0.4))
Out[6]:
Log Sigmoid Loss¶
In [7]:
Copied!
def point_loss(x):
return -math.log(minitorch.operators.sigmoid(-x))
def point_loss(x):
return -math.log(minitorch.operators.sigmoid(-x))
In [8]:
Copied!
graph(point_loss, [], [])
graph(point_loss, [], [])
Out[8]:
In [9]:
Copied!
graph(point_loss, [], [-2, -0.2, 1])
graph(point_loss, [], [-2, -0.2, 1])
Out[9]:
Lecture Quiz¶
Outline¶
- Model Fit
- Symbolic Derivatives
- Numerical Derivatives
- Module 1
Model Fitting¶
Class Goal¶
- Find parameters that minimize loss
In [10]:
Copied!
hcat(
[show(Linear(1, 1, -0.6)), show(Linear(1, 1, -0.7)), show(Linear(1, 1, -0.8))], 0.3
)
hcat(
[show(Linear(1, 1, -0.6)), show(Linear(1, 1, -0.7)), show(Linear(1, 1, -0.8))], 0.3
)
Out[10]:
Numerical Optimization¶
- Many, many different approaches
- Our focus: gradient descent
- Workhorse of modern machine learning
Iterative Parameter Fitting¶
- Compute the loss function, $L(w_1, w_2, b)$
- See how small changes would change the loss
- Update to parameters to locally reduce the loss
Example: Update Bias¶
In [11]:
Copied!
model1 = Linear(1, 1, -0.4)
model2 = Linear(1, 1, -0.5)
model1 = Linear(1, 1, -0.4)
model2 = Linear(1, 1, -0.5)
In [12]:
Copied!
compare(model1, model2)
compare(model1, model2)
Out[12]:
Step 1: Compute Loss¶
In [13]:
Copied!
with_points(s1, s2, Linear(1, 1, -1.5))
with_points(s1, s2, Linear(1, 1, -1.5))
Out[13]:
In [14]:
Copied!
def point_loss(out, y=1):
return y * -math.log( # Correct Side
minitorch.operators.sigmoid(-out) # Log-Sigmoid
) # Distance
def point_loss(out, y=1):
return y * -math.log( # Correct Side
minitorch.operators.sigmoid(-out) # Log-Sigmoid
) # Distance
Full Loss¶
In [15]:
Copied!
def full_loss(m):
l = 0
for x, y in zip(s.X, s.y):
l += point_loss(-m.forward(*x), y)
return -l
def full_loss(m):
l = 0
for x, y in zip(s.X, s.y):
l += point_loss(-m.forward(*x), y)
return -l
In [16]:
Copied!
hcat(
[
graph(point_loss, [], [-2, -0.2, 1]),
graph(lambda x: point_loss(-x), [-1, 0.4, 1.3], []),
],
0.3,
)
hcat(
[
graph(point_loss, [], [-2, -0.2, 1]),
graph(lambda x: point_loss(-x), [-1, 0.4, 1.3], []),
],
0.3,
)
Out[16]:
Step 2: Find Direction of Improvement¶
In [17]:
Copied!
hcat([show(Linear(1, 1, -1.5)), show(Linear(1, 1, -1.45))], 0.3)
hcat([show(Linear(1, 1, -1.5)), show(Linear(1, 1, -1.45))], 0.3)
Out[17]:
Step 3: Update Parameters Iteratively¶
In [18]:
Copied!
set_svg_height(300)
show_loss(full_loss, Linear(1, 1, 0))
set_svg_height(300)
show_loss(full_loss, Linear(1, 1, 0))
Out[18]:
Our Challenge¶
How do we find the right direction?
Symbolic Derivatives¶
Review: What is a Derivative?¶
How small changes in input impact output.
- $f(x)$ - function
- $x$ - point
- $f'(x)$ - "rise/run"
Review: Derivative¶
$$f(x) = x^2 + 1$$
In [19]:
Copied!
def f(x):
return x * x + 1.0
plot_function("f(x)", f)
def f(x):
return x * x + 1.0
plot_function("f(x)", f)
Review: Derivative¶
$$f(x) = x^2 + 1$$ $$f'(x) = 2x$$
In [20]:
Copied!
def f_prime(x):
return 2 * x
def tangent_line(slope, x, y):
def line(x_):
return slope * (x_ - x) + y
return line
plot_function("f(x) vs f'(2)", f, fn2=tangent_line(f_prime(2), 2, f(2)))
def f_prime(x):
return 2 * x
def tangent_line(slope, x, y):
def line(x_):
return slope * (x_ - x) + y
return line
plot_function("f(x) vs f'(2)", f, fn2=tangent_line(f_prime(2), 2, f(2)))
Symbolic Derivative¶
- Standard high-school derivatives
- Rewrite $f$ to new form $f'$
- Produces mathematical function
Example Function¶
$$f(x) = \sin(2 x)$$
In [21]:
Copied!
plot_function("f(x) = sin(2x)", lambda x: math.sin(2 * x))
plot_function("f(x) = sin(2x)", lambda x: math.sin(2 * x))
Symbolic Derivative¶
$$f(x) = \sin(2 x) \Rightarrow f'(x) = 2 \cos(2 x)$$
In [22]:
Copied!
plot_function(
"f'(x) = 2*cos(2x)", lambda x: 2 * math.cos(2 * x), fn2=lambda x: math.sin(2 * x)
)
plot_function(
"f'(x) = 2*cos(2x)", lambda x: 2 * math.cos(2 * x), fn2=lambda x: math.sin(2 * x)
)
Multiple Arguments¶
$$f(x, y) = \sin(x) + \cos(y)$$
In [23]:
Copied!
plot_function3D(
"f(x, y) = sin(x) + 2 * cos(y)", lambda x, y: math.sin(x) + 2 * math.cos(y)
)
plot_function3D(
"f(x, y) = sin(x) + 2 * cos(y)", lambda x, y: math.sin(x) + 2 * math.cos(y)
)
Derivatives with Multiple Arguments¶
$$f_x'(x, y) = \cos(x) \ \ \ f_y'(x, y) = -2 \sin(y)$$
In [24]:
Copied!
plot_function3D("f'_x(x, y) = cos(x)", lambda x, y: math.cos(x))
plot_function3D("f'_x(x, y) = cos(x)", lambda x, y: math.cos(x))
Numerical Derivatives¶
In [25]:
Copied!
def f(x: float) -> float:
...
def f(x: float) -> float:
...
Derivative as higher-order function¶
$$f(x) = ...$$ $$f'(x) = ...$$
In [26]:
Copied!
def derivative(f: Callable[[float], float]) -> Callable[[float], float]:
def f_prime(x: float) -> float:
...
return f_prime
def derivative(f: Callable[[float], float]) -> Callable[[float], float]:
def f_prime(x: float) -> float:
...
return f_prime
Definition of Derivative¶
$$f'(x) = \lim_{\epsilon \rightarrow 0} \frac{f(x + \epsilon) - f(x - \epsilon)}{2\epsilon}$$
Central Difference¶
Approximate derivative
$$f'(x) \approx \frac{f(x + \epsilon) - f(x-\epsilon)}{2\epsilon}$$

Approximating Derivative¶
Key Idea: Only need to call $f$.
In [27]:
Copied!
def central_difference(f: Callable[[float], float], x: float) -> float:
...
def central_difference(f: Callable[[float], float], x: float) -> float:
...
Derivative as higher-order function¶
$$f(x) = ...$$ $$f'(x) = ...$$
In [28]:
Copied!
def derivative(f: Callable[[float], float]) -> Callable[[float], float]:
def f_prime(x: float) -> float:
return minitorch.central_difference(f, x)
return f_prime
def derivative(f: Callable[[float], float]) -> Callable[[float], float]:
def f_prime(x: float) -> float:
return minitorch.central_difference(f, x)
return f_prime
Advanced: Mulitiple Arguments¶
Turn 2-argument function into 1-arg.
In [29]:
Copied!
def f(x, y):
...
def f_x_prime(x: float, y: float) -> float:
def inner(x: float) -> float:
return f(x, y)
return derivative(inner)(x)
def f(x, y):
...
def f_x_prime(x: float, y: float) -> float:
def inner(x: float) -> float:
return f(x, y)
return derivative(inner)(x)
Example¶
In [30]:
Copied!
def sigmoid(x: float) -> float:
if x >= 0:
return 1.0 / (1.0 + math.exp(-x))
else:
return math.exp(x) / (1.0 + math.exp(x))
plot_function("sigmoid", sigmoid)
def sigmoid(x: float) -> float:
if x >= 0:
return 1.0 / (1.0 + math.exp(-x))
else:
return math.exp(x) / (1.0 + math.exp(x))
plot_function("sigmoid", sigmoid)
Example¶
In [31]:
Copied!
sigmoid_prime = derivative(sigmoid)
plot_function("Derivative of sigmoid", sigmoid_prime)
sigmoid_prime = derivative(sigmoid)
plot_function("Derivative of sigmoid", sigmoid_prime)
Symbolic¶
- Transformation of mathematical function
- Gives full form of derivative
- Utilizes mathematical identities
Numerical¶
- Only requires evaluating function
- Computes derivative at a point
- Can be applied to fully black-box function
Next Class: Autodifferentiation¶
- Computes derivative on programs trace
- Efficient for large number of parameters
- Works directly on python code
Module-1¶
Module-1 Learning Objectives¶
- Practical understanding of derivatives
- Dive into autodifferentiation
- Parameters and their usage
Module-1: What is it?¶
- Numerical and symbolic derivatives
- Implement our numerical class
- Implement autodifferentiation
- Everything is scalars for now (no "gradients")